In [1]:
import h2o
from h2o.estimators.xgboost import H2OXGBoostEstimator
In [2]:
%%capture
h2o.connect(ip="35.227.47.29")
h2o.no_progress()
In [3]:
# Import some data from Amazon S3
h2oDF = h2o.import_file(path="https://s3-us-west-1.amazonaws.com/dsclouddata/LendingClubData/LoansGoodBad.csv")
# Stratified Split into Train/Test
stratsplit = h2oDF["Bad_Loan"].stratified_split(test_frac=0.3, seed=12349453)
train = h2oDF[stratsplit=="train"]
test = h2oDF[stratsplit=="test"]
In [4]:
# Identify predictors and response
x = train.columns
y = "Bad_Loan"
x.remove(y)
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()
train.head(5)
Out[4]:
In [12]:
%%time
XGB_GPU = H2OXGBoostEstimator(model_id="XGB_on_GPU", ntrees=200, max_depth=9, learn_rate=0.05, backend="gpu", gpu_id=0)
XGB_GPU.train(x=x, y=y, training_frame=train, validation_frame=test)
print "Accuracy AUC: " + str(XGB_GPU.auc())
In [13]:
%%time
XGB_CPU = H2OXGBoostEstimator(model_id="XGB_on_CPU", ntrees=200, max_depth=9, learn_rate=0.05, backend="cpu")
XGB_CPU.train(x=x, y=y, training_frame=train, validation_frame=test)
print "Accuracy AUC: " + str(XGB_CPU.auc())
In [16]:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('GPU', 'CPU')
y_pos = np.arange(len(objects))
seconds = [11,84]
plt.barh(y_pos, seconds, align='center', alpha=0.5)
plt.yticks(y_pos, objects)
plt.xlabel('Seconds')
plt.title('XGBoost Training Time in Seconds')
plt.show()
In [15]:
GPU_Cost = (0.9 / 3600) * 11
CPU_Cost = (0.7 / 3600) * 84
objects = ('GPU Cost', 'CPU Cost')
y_pos = np.arange(len(objects))
seconds = [GPU_Cost,CPU_Cost]
plt.barh(y_pos, seconds, align='center', alpha=0.5)
plt.yticks(y_pos, objects)
plt.xlabel('Cents')
plt.title('Cost in Cents')
plt.show()